Ghost crab burrows were sampled on Nanny Goat Beach from October 21 to October 24, 2018 during the Sapelo Field Course 2018. Transects perpendicular to the tide line, from the tide line to the edge of the dunes/backdunes transition, were done every 2 meter from the Pavillon to the very south of the Beach, where the creeks enters the marsh.
data <- read.csv("~/Dropbox/Ghost Crabs Sapelo/all-burrows-2018.csv", header=TRUE)
summary(data)
## GPS.Waypoint Height Width Activity
## Min. : 887 Min. : 5.29 Min. : 7.60 Min. :0.000
## 1st Qu.:1018 1st Qu.:28.75 1st Qu.:28.84 1st Qu.:1.000
## Median :1150 Median :37.28 Median :37.32 Median :2.000
## Mean :1164 Mean :37.42 Mean :37.97 Mean :1.962
## 3rd Qu.:1312 3rd Qu.:46.98 3rd Qu.:47.40 3rd Qu.:3.000
## Max. :1442 Max. :73.96 Max. :90.26 Max. :3.000
##
## Environment Direction Part Count
## Beach:259 U :127 South Beach:523 Min. :1
## Dunes:264 D : 96 1st Qu.:1
## U-L : 71 Median :1
## U-R : 56 Mean :1
## L : 48 3rd Qu.:1
## (Other):123 Max. :1
## NA's : 2
## Latitude Longitude Elevation
## N31\xc1 22.724': 10 W81\xc1 16.088': 7 Min. :-4.000
## N31\xc1 22.758': 9 W81\xc1 16.089': 6 1st Qu.: 1.000
## N31\xc1 22.720': 8 W81\xc1 15.890': 5 Median : 2.000
## N31\xc1 22.735': 8 W81\xc1 15.932': 5 Mean : 2.692
## N31\xc1 22.723': 6 W81\xc1 16.229': 5 3rd Qu.: 4.000
## N31\xc1 22.725': 6 W81\xc1 16.550': 5 Max. :15.000
## (Other) :476 (Other) :490
## Date.Created Oval.Area_mm2 Lon.DMS
## 2018-10-21, 11:04 AM: 2 Min. : 53.76 Min. :-81.28
## 2018-10-21, 11:28 AM: 2 1st Qu.: 674.26 1st Qu.:-81.28
## 2018-10-21, 11:51 AM: 2 Median :1105.25 Median :-81.27
## 2018-10-21, 2:57 PM : 2 Mean :1231.78 Mean :-81.27
## 2018-10-21, 4:39 PM : 2 3rd Qu.:1705.18 3rd Qu.:-81.27
## 2018-10-22, 1:19 PM : 2 Max. :4623.81 Max. :-81.26
## (Other) :511
## Lat.DMS
## Min. :31.38
## 1st Qu.:31.38
## Median :31.38
## Mean :31.38
## 3rd Qu.:31.39
## Max. :31.39
##
# We can see there were 259 burrows on the beach, and 264 burrows on the sand.
# Relationship between Width and Height of the burrows, coloured by environment
plot(data$Height ~ data$Width, col=data$Environment, xlab="Width (mm)",
ylab="Height (mm)", main="Correlation between Height and Width of Burrows")
cor(data$Height, data$Width)
## [1] 0.8147132
linearMod <- lm(Height ~ Width, data=data)
(linearMod)
##
## Call:
## lm(formula = Height ~ Width, data = data)
##
## Coefficients:
## (Intercept) Width
## 8.6465 0.7578
summary(linearMod)
##
## Call:
## lm(formula = Height ~ Width, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.3836 -4.6673 -0.3851 4.4662 27.8435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.64651 0.95584 9.046 <2e-16 ***
## Width 0.75777 0.02363 32.070 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.534 on 521 degrees of freedom
## Multiple R-squared: 0.6638, Adjusted R-squared: 0.6631
## F-statistic: 1028 on 1 and 521 DF, p-value: < 2.2e-16
# Calculate the area of the burrows: formula for the area of an oval: PI x (height/2) x (width/2)
data$Area <- as.numeric(pi*(data$Height/2)*(data$Width/2))
hist(data$Area, xlab="Area of burrows (mm2)", main="Histogram of Area")
#The majority of burrows are "small"
Dunes <- subset(data, data$Environment=="Dunes")
Beach <- subset(data, data$Environment=="Beach")
hist(Beach$Area, ylim=c(0,100),xlim=c(0,5000), main="A. BURROWS", xlab="Size (area) of the burrow (mm2)")
hist(Dunes$Area, ylim=c(0,100), xlim=c(0,5000), main="B. DUNES", xlab="Size (area) of the burrow (mm2)")
# Is the data normally distributed?
shapiro.test(Dunes$Area)
##
## Shapiro-Wilk normality test
##
## data: Dunes$Area
## W = 0.9267, p-value = 3.93e-10
shapiro.test(Beach$Area)
##
## Shapiro-Wilk normality test
##
## data: Beach$Area
## W = 0.93579, p-value = 3.434e-09
qqnorm(Dunes$Area); qqline(Dunes$Area, col=2)
qqnorm(Beach$Area); qqline(Beach$Area, col=2)
# Summarize
summary(Dunes$Area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 222.4 923.7 1345.9 1471.3 1900.5 4623.8
summary(Beach$Area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 53.76 402.06 833.05 987.63 1485.58 3299.83
# Compare the normal distribtion using a standard two-sided z test
library(BSDA)
## Loading required package: lattice
##
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
z.test(Dunes$Area, sigma.x=0.5, y=Beach$Area, sigma.y=0.5, mu=2)
##
## Two-sample z-Test
##
## data: Dunes$Area and Beach$Area
## z = 11015, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 2
## 95 percent confidence interval:
## 483.5816 483.7531
## sample estimates:
## mean of x mean of y
## 1471.3010 987.6336
# Plot Area vs Environments
#boxplot(data$Area ~ data$Environment, xlab="Environment", ylab="Area of each burrow (mm2)", main="Area of burrows by environment type")
# with ggplot
library(ggplot2)
library(ggsignif)
ggplot(data, aes(x=Environment, y=Area)) +
geom_boxplot() +
geom_signif(comparisons = list(c("Beach", "Dunes")),
map_signif_level=TRUE)+
ggtitle("C.")+
theme_classic()
p <- ggplot(data, aes(x=Environment, y=Area)) +
geom_violin(trim=FALSE) +
geom_signif(comparisons = list(c("Beach", "Dunes")),
map_signif_level=TRUE)
p
# Measure the variance in the beach and dunes data
range(Beach$Area)
## [1] 53.76255 3299.82682
range(Dunes$Area)
## [1] 222.3839 4623.8102
data_summary <- function(x) {
m <- mean(x)
ymin <- m-sd(x)
ymax <- m+sd(x)
return(c(y=m,ymin=ymin,ymax=ymax))
}
p + stat_summary(fun.data=data_summary)
We used an activity scale to determine if the burrow were active or not. 0= Burrow hole not clear; 1= Clear burrow hole but not traces or sand kickback; 2= Clear hole plus 1 indicator (traces or kickback), 3= Very clear hole AND clear presence of traces and/or kickback
# Plot activity by environment
data$Activity <- as.vector(data$Activity)
# Grouped Bar Plot
counts.of.activity <- table(data$Environment, data$Activity)
barplot(counts.of.activity, ylab=c(0,150), main="Activity level by environment type",
xlab="Activity Level Scale", col=c("Red","Yellow"),
legend = rownames(counts.of.activity), beside=TRUE)
#ANOVA
res.aov <- aov(Activity ~ Environment, data = data)
# Summary of the analysis
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Environment 1 32.2 32.22 34.33 8.26e-09 ***
## Residuals 521 489.0 0.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Ok there is a significant difference between the activity levels between the beach and dunes, but how can I test if there is a significant different *within* each activity level scale, between the two environments?
We hypothesize that on the beach, the burrows would be away from the tidal lines, wherease on the dunes it doesn’t matter.
data$Direction <- as.vector(data$Direction)
# Grouped Bar Plot
counts.of.orientation <- table(data$Environment, data$Direction)
barplot(counts.of.orientation, ylim = c(0,150), main="Orientation of the burrows by environment type",
xlab="Burrow orientation compared to tidal line", col=c("Red","Yellow"),
legend = rownames(counts.of.orientation), beside=TRUE)
plot(data$Lat.DMS ~ data$Lon.DMS, col=data$Environment, cex=0.5)
# red dots are Dunes, black are beach
Let’s try to plot this into a 3D plot:
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.4.4
s3d <- scatterplot3d(x = data$Lon.DMS,
y = data$Lat.DMS,
z = data$Elevation,
cex.symbols=0.5,
color=c(data$Environment),
xlab="Longitude",
ylab="Latitude",
zlab="Elevation(m)",
angle = 85)
# Here the black = Beach and Red = Dunes
From here on, I move into ArcGIS to do the analysis of the points.
To do this, I go under Analysis, then Analyse Patterns, then Calculate Density. From this, we see that there is a high density towards the southern tip of the beach, between the second and last runnels, and before reaching the dunes.
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3
include_graphics("/Users/patriciatran/Desktop/GIS-clustering-ghost-crabs.png")
GIS, import data, trace a tideline (create new tide feature), use the Tool Near Point to Line
gis.data <- read.csv("ghost-crab-GIS-analysis.csv")
plot(gis.data$Oval_Area_mm2~gis.data$Dist_near_tideline, col=gis.data$Environment,
xlab="Distance to tideline (m)",
ylab="Size of the burrow opening (mm2)")
# Ok so clearly no relatioship between distance to shoreline and size of the burrows
cor(x=gis.data$Dist_near_tideline, gis.data$Oval_Area_mm2)
## [1] 0.2163122
linearMod.disttide.area <- lm(Oval_Area_mm2 ~ Dist_near_tideline, data=gis.data)
(linearMod.disttide.area)
##
## Call:
## lm(formula = Oval_Area_mm2 ~ Dist_near_tideline, data = gis.data)
##
## Coefficients:
## (Intercept) Dist_near_tideline
## 775.861 4.251
summary(linearMod.disttide.area)
##
## Call:
## lm(formula = Oval_Area_mm2 ~ Dist_near_tideline, data = gis.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1211.6 -536.2 -155.1 466.4 3195.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 775.8611 95.9146 8.089 4.25e-15 ***
## Dist_near_tideline 4.2514 0.8407 5.057 5.91e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 748.8 on 521 degrees of freedom
## Multiple R-squared: 0.04679, Adjusted R-squared: 0.04496
## F-statistic: 25.57 on 1 and 521 DF, p-value: 5.905e-07
Are the burrows in the dunes or on the shore closer to each other?
library(ggplot2)
library(ggsignif)
ggplot(gis.data, aes(x=Environment, y=Dist_near_burrow)) +
geom_boxplot() +
geom_signif(comparisons = list(c("Beach", "Dunes")),
map_signif_level=TRUE)+
ylab("Distance to nearest burrow(m)")+
xlab("Environment")+
theme_classic()
Weather data
weather <- read.csv("~/Documents/GitHub/ghost-crabs/Data/NOAA-Sapelo-Weather.tsv", sep="\t")
# Get daily averages:
daily_weather <- aggregate(weather[, 3:ncol(weather)], list(weather$DD), mean)
plot(daily_weather$WDIR ~ daily_weather$Group.1, xlab="Day (October)", ylab="Wind direction")